p. 75

pos = replicate(1000, sum(runif(16,-1,1)))
hist(pos)

p.76

big = replicate(1e4, prod(1 + runif(12, 0, 0.5)))
small = replicate(1e4, prod(1 + runif(12, 0, 0.01)))

dens(small, norm.comp=TRUE)

dens(big, norm.comp=TRUE)

logbig = replicate(1e4, log(prod(1 + runif(12, 0, 0.5))))
dens(logbig, norm.comp=TRUE)

p. 81

data(Howell1)

df = Howell1

head(df)
##    height   weight age male
## 1 151.765 47.82561  63    1
## 2 139.700 36.48581  63    0
## 3 136.525 31.86484  65    0
## 4 156.845 53.04191  41    1
## 5 145.415 41.27687  51    0
## 6 163.830 62.99259  35    1
precis(df)
##               mean         sd      5.5%     94.5%     histogram
## height 138.2635963 27.6024476 81.108550 165.73500 ▁▁▁▁▁▁▁▂▁▇▇▅▁
## weight  35.6106176 14.7191782  9.360721  54.50289 ▁▂▃▂▂▂▂▅▇▇▃▂▁
## age     29.3443934 20.7468882  1.000000  66.13500     ▇▅▅▃▅▂▂▁▁
## male     0.4724265  0.4996986  0.000000   1.00000    ▇▁▁▁▁▁▁▁▁▇
df2 = df[df$age >= 18,]
dens(df2$height)

### p. 85

# Prior for height mean
curve(dnorm(x, 178, 20), from=100, to=250)

# Prior for height std
curve(dunif(x, 0, 50), from=-10, to=60)

sample_mu = rnorm(1e4, 178, 20)
sample_sigma = runif(1e4, 0, 50)
prior_h = rnorm(1e4, sample_mu, sample_sigma)
dens(prior_h)

sample_mu = rnorm(1e4, 178, 100)
prior_h = rnorm(1e4, sample_mu, sample_sigma)
dens(prior_h)

p. 87

mu.list = seq(from=150, to=160, length.out=100)
sigma.list = seq(from=7, to=9, length.out=100)
post = expand.grid(mu=mu.list, sigma=sigma.list)

post$LL = sapply(1:nrow(post), function(i) sum(dnorm(df2$height, post$mu[i], post$sigma[i], log=TRUE)))
post$prod = post$LL + dnorm(post$mu, 178, 20, log=TRUE)
post$prob = exp(post$prod - max(post$prod))
image_xyz(post$mu, post$sigma, post$prob)

p. 88: sampling from the posterior

sample.rows = sample(1:nrow(post), size=1e4, replace=TRUE, prob=post$prob)
sample.mu = post$mu[sample.rows]
sample.sigma = post$sigma[sample.rows]
plot(sample.mu, sample.sigma, cex=0.5, pch=16, col=col.alpha(rangi2, 0.1))

dens(sample.mu)

dens(sample.sigma)

PI(sample.mu)
##       5%      94% 
## 153.9394 155.2525
PI(sample.sigma)
##       5%      94% 
## 7.303030 8.252525

p.89: overthinking

df3 = sample(df2$height, size=20)
mu.list = seq(from=150, to=170, length.out=200)
sigma.list = seq(from=4, to=20, length.out=200)
post2 = expand.grid(mu=mu.list, sigma=sigma.list)
post2$LL = sapply( 1:nrow(post2), function(i) sum(dnorm(df3, mean=post2$mu[i], sd=post2$sigma[i], log=TRUE)))

post2$prod = post2$LL + dnorm(post2$mu, 178, 20, TRUE) + dunif(post2$sigma, 0, 50, TRUE)
post2$prob = exp(post2$prod - max(post2$prod))
sample2.rows = sample(1:nrow(post2), size=1e4, replace=TRUE, prob=post2$prob)
sample2.mu = post2$mu[sample2.rows] 
sample2.sigma = post2$sigma[sample2.rows] 
plot(sample2.mu, sample2.sigma, cex=0.5, col=col.alpha(rangi2,0.1), xlab="mu", ylab="sigma", pch=16)

dens(sample2.sigma, norm.comp=TRUE)

p.90 Finding the posterior distribution with quap

flist = alist(
  height ~ dnorm(mu, sigma),
  mu ~ dnorm(178, 20),
  sigma ~ dunif(0, 50)
)

m4.1 = quap(flist, data=df2)

precis(m4.1)
##             mean        sd       5.5%      94.5%
## mu    154.607234 0.4119549 153.948850 155.265618
## sigma   7.730586 0.2913157   7.265008   8.196165
flist = alist(
  height ~ dnorm(mu, sigma),
  mu ~ dnorm(178, 0.1),
  sigma ~ dunif(0, 50)
)

m4.2 = quap(flist, data=df2)

precis(m4.2)
##            mean        sd      5.5%     94.5%
## mu    177.86375 0.1002354 177.70356 178.02395
## sigma  24.51757 0.9289237  23.03297  26.00217
mcov = vcov(m4.1)
diag(mcov)
##         mu      sigma 
## 0.16970686 0.08486483
cov2cor(mcov)
##                mu       sigma
## mu    1.000000000 0.001854566
## sigma 0.001854566 1.000000000
post = extract.samples(m4.1, n=1e4)
head(post)
##         mu    sigma
## 1 154.1115 8.361579
## 2 154.5655 7.765194
## 3 154.4405 7.925284
## 4 154.2046 7.973574
## 5 154.4444 7.622452
## 6 154.3674 7.866277
precis(post)
##             mean        sd       5.5%      94.5%   histogram
## mu    154.606368 0.4112522 153.953827 155.256155    ▁▁▁▅▇▂▁▁
## sigma   7.729598 0.2943963   7.265501   8.203861 ▁▁▁▂▅▇▇▃▁▁▁
plot(post)

p94. Linear prediction

plot(df2$height ~ df2$weight)

set.seed(2971)

N = 100
a = rnorm(N, 178, 20)
b = rnorm(N, 0, 10)

plot_lines = function() {
  plot(NULL, xlim=range(df2$weight), ylim=c(-100,400), xlab="weight", ylab="height")
  abline(h=0, lty=2)
  abline(h=272, lty=1, lwd=0.5)
  mtext("b ~ dnorm(0,10)")
  xbar = mean(df2$weight)
  for(i in 1:N) curve(a[i] + b[i]*(x - xbar), from=min(df2$weight) , to=max(df2$weight) , add=TRUE , col=col.alpha("black",0.2))
}

plot_lines()

b = rlnorm(1e4, 0, 1)
dens(b, xlim=c(0,5), adj=0.1)

set.seed(2971)
N = 100
a = rnorm(N, 178, 20)
b = rlnorm(N, 0, 1)
plot_lines()

p.100 find the posterior

xbar = mean(df2$weight)

flist = alist(
  height ~ dnorm(mu, sigma),
  mu <- a + b * (weight - xbar),
  a ~ dnorm(178, 20),
  b ~ dlnorm(0, 1),
  sigma ~ dunif(0, 50)
)

m4.3 = quap(flist, data=df2)

precis(m4.3)
##              mean         sd        5.5%       94.5%
## a     154.6013671 0.27030766 154.1693633 155.0333710
## b       0.9032807 0.04192363   0.8362787   0.9702828
## sigma   5.0718809 0.19115478   4.7663786   5.3773831
round(vcov(m4.3), 3)
##           a     b sigma
## a     0.073 0.000 0.000
## b     0.000 0.002 0.000
## sigma 0.000 0.000 0.037

p.103 plotting the posterior

plot(height ~ weight, data=df2, col=rangi2)
post = extract.samples(m4.3)
a_map = mean(post$a)
b_map = mean(post$b)
curve(a_map + b_map * (x - xbar), add=TRUE)

plot_n_curves = function(N) {
  dfN = df2[1:N,]
  mN = quap(flist, data=dfN)
  post = extract.samples(mN, n=N)

  # display raw data and sample size
  plot(dfN$weight, dfN$height,
       xlim=range(df2$weight), ylim=range(df2$height),
       col=rangi2, xlab="weight", ylab="height")
  
  mtext(concat("N = ", N))

  # plot the lines, with transparency
  xbar = mean(dfN$weight)
  for (i in 1:N) {
    curve(post$a[i] + post$b[i] * (x - xbar), col=col.alpha("black", 0.3), add=TRUE)
  }
}


plot_n_curves(20)

plot_n_curves(30)

plot_n_curves(40)

plot_n_curves(50)

post = extract.samples(m4.3)
mu_at_50 = post$a + post$b * (50 - xbar)
dens(mu_at_50, col=rangi2, lwd=2, xlab="mu|weight=50")

weight.seq = seq(from=25, to=70, by=1)
mu = link(m4.3, data=data.frame(weight=weight.seq)) # Book p. 110
plot(height ~ weight, df2, type="n")

for (i in 1:N) {
  points(weight.seq, mu[i,], pch=16, col=col.alpha(rangi2, 0.1))
}

mu.mean = apply(mu, 2, mean)
mu.PI = apply(mu, 2, PI, prob=0.89)

plot(height ~ weight, data=df2, col=col.alpha(rangi2, 0.5))
lines(weight.seq, mu.mean)
shade(mu.PI, weight.seq)

sim.height = sim(m4.3, data=list(weight=weight.seq), n=1e4)
height.PI = apply(sim.height, 2, PI, prob=0.89)

plot(height ~ weight, df2, col=col.alpha(rangi2, 0.5))
lines(weight.seq, mu.mean)
shade(mu.PI, weight.seq)
shade(height.PI, weight.seq)

p. Curves from lines

poly2 = alist(
  height ~ dnorm(mu, sigma),
  mu <- a + b1 * weight_s + b2 * weight_s2,
  a ~ dnorm(178, 20),
  b1 ~ dlnorm(0, 1),
  b2 ~ dnorm(0, 1),
  sigma ~ dunif(0, 50)
)

poly3 = alist(
  height ~ dnorm(mu, sigma),
  mu <- a + b1 * weight_s + b2 * weight_s2 + b3 * weight_s3,
  a ~ dnorm(178, 20),
  b1 ~ dlnorm(0, 1),
  b2 ~ dnorm(0, 1),
  b3 ~ dnorm(0, 1),
  sigma ~ dunif(0, 50)
)


df$weight_s = (df$weight - mean(df$weight)) / sd(df$weight)
df$weight_s2 = df$weight_s ^ 2
df$weight_s3 = df$weight_s ^ 3

plot_model = function(poly, degree=2) {
  model = quap(poly, data=df)
  
  weight.seq = seq(from=-2.2, to=2, length.out=30)
  
  if (degree == 2) pred_dat = list(weight_s=weight.seq, weight_s2=weight.seq^2) 
  else pred_dat = list(weight_s=weight.seq, weight_s2=weight.seq ^ 2, weight_s3=weight.seq^3) 
  
  mu = link(model, data=pred_dat)
  mu.mean = apply(mu, 2, mean)
  mu.PI = apply(mu, 2, PI, prob=0.89)
  sim.height = sim(model, data=pred_dat)
  height.PI = apply(sim.height, 2, PI , prob=0.89)
  
  plot(height ~ weight_s, df, col=col.alpha(rangi2, 0.5))
  lines(weight.seq, mu.mean)
  shade(mu.PI, weight.seq)
  shade(height.PI, weight.seq)
}
  
plot_model(poly2)

plot_model(poly3, degree=3)